#### installing packages if not installed ####

list.of.packages <- c('dplyr',
                      'estimatr',
                      'xtable',
                      'tidyverse',
                      'haven',
                      'readxl',
                      'ggthemes',
                      'rdrobust',
                      'gridExtra',
                      'lubridate')
new.packages <- 
  list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

#### purpose: analyzing san diego dataset #### 

#### attach packages ####

suppressPackageStartupMessages(
  
  {
    
    library(dplyr)
    library(estimatr)
    library(xtable)
    library(tidyverse)
    library(haven)
    library(readxl)
    library(ggthemes)
    library(rdrobust)
    library(gridExtra)
    library(lubridate)
    
  }
  
)

#### loading in data #### 

load(file = "data/sd_count_ts.RData")
load(file = "data/sd_hit_ts.RData")
load(file = "data/sd_arrest_ts.RData")
load(file = "data/sd_race_ts.RData")
load(file = "data/sd_call_ts.RData")

#### analyzing stop data #### 

# rdit analysis

sd_count_ts$blmrv = 
  sd_count_ts$date - min(sd_count_ts$date[sd_count_ts$blm == 1])

rdmod_count = 
  with(sd_count_ts, rdrobust(x = blmrv, y = count, p = 1, kernel = "uni"))

# stops over time 

sdplot1 = sd_count_ts %>% 
  filter(date >= as.Date("2020-05-28")-60) %>% 
  filter(date <= as.Date("2020-05-28")+60) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-05-28"),
             linetype = 2) + 
  geom_point(aes(x = date, y = count),
             alpha = .3,
             size = .5) + 
  geom_smooth(aes(x = date, y = count, group = blm),
              se = FALSE,
              col = "black",
              size = .4) + 
  labs(x = "Date", y = "Terry Stops",
       title = "A. Terry Stops Over Time") + 
  annotate("text",
           label = paste0("Est: ", round(rdmod_count$coef[1], 0),
                          "\nSE: ", round(rdmod_count$se[3], 0),
                          "\np < 0.001"),
           x = as.Date("2020-04-15"),
           y = 175,
           family = "serif",
           size = 2.25) + 
  theme_tufte(base_size = 7) 

# assessing persistence

persist = 100

mat_persist = 
  matrix(NA, nrow = persist, ncol = 4) %>% 
  as.data.frame %>% 
  `colnames<-` (c("est", "se", "pv", "dayscut")) %>% 
  mutate(est = as.numeric(est),
         se = as.numeric(se),
         pv = as.numeric(pv),
         dayscut = as.numeric(dayscut))

for (i in 1:persist) {
  
  print(paste0("I iteration ", i))
  persist_df = sd_count_ts %>% filter(blmrv > (i-1) | blmrv < 0)
  persist_df$blmrv = 
    ifelse(persist_df$blmrv > -1, persist_df$blmrv - i, persist_df$blmrv)
  
  rdmod_count_persist = 
    with(persist_df, rdrobust(x = blmrv, y = count, p = 1, kernel = "uni"))
  
  mat_persist[i, 1] = rdmod_count_persist$coef[1]
  mat_persist[i, 2] = rdmod_count_persist$se[3]
  mat_persist[i, 3] = rdmod_count_persist$pv[3]
  mat_persist[i, 4] = i
  
}

sdplot2 = mat_persist %>% 
  ggplot() + 
  geom_point(aes(x = dayscut, y = est),
             size = .3) + 
  geom_errorbar(aes(x = dayscut, 
                    ymin = est - 1.96 * se,
                    ymax = est + 1.96 * se),
                width = 0,
                size = .05) +
  geom_errorbar(aes(x = dayscut, 
                    ymin = est - 1.645 * se,
                    ymax = est + 1.645 * se),
                width = 0,
                size = .1) +
  geom_hline(yintercept = 0) + 
  labs(x = "Days Cut Post-BLM Protest",
       y = "RDiT Coefficient\n(Post-BLM Protest)",
       title = "B. Post-BLM Effect Persistence\n(Terry Stops)") + 
  theme_tufte(base_size = 7)

#### analyzing hit rate data #### 

# rdit analysis

sd_hit_ts$blmrv = 
  sd_hit_ts$date - min(sd_hit_ts$date[sd_hit_ts$blm == 1])

rdmod_hit = 
  with(sd_hit_ts, rdrobust(x = blmrv, y = hit, p = 1, kernel = "uni"))

# plot

sdplot3 = sd_hit_ts %>% 
  filter(date >= as.Date("2020-05-28")-60) %>% 
  filter(date <= as.Date("2020-05-28")+60) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-05-28"),
             linetype = 2) + 
  geom_point(aes(x = date, y = hit),
             alpha = .3,
             size = .5) + 
  geom_smooth(aes(x = date, y = hit, group = blm),
              se = FALSE,
              col = "black",
              size = .4) + 
  labs(x = "Date", y = "Hit Rate",
       title = "C. Hit Rates Over Time") + 
  annotate("text",
           label = paste0("Est: ", round(rdmod_hit$coef[1], 2),
                          "\nSE: ", round(rdmod_hit$se[3], 2),
                          "\np: ", round(rdmod_hit$pv[3], 2)),
           x = as.Date("2020-04-15"),
           y = .3,
           family = "serif",
           size = 2.25) + 
  theme_tufte(base_size = 7) 

# assessing persistence

persist = 100

mat_persist = 
  matrix(NA, nrow = persist, ncol = 4) %>% 
  as.data.frame %>% 
  `colnames<-` (c("est", "se", "pv", "dayscut")) %>% 
  mutate(est = as.numeric(est),
         se = as.numeric(se),
         pv = as.numeric(pv),
         dayscut = as.numeric(dayscut))

for (i in 1:persist) {
  
  print(paste0("I iteration ", i))
  
  persist_df = sd_hit_ts %>% filter(blmrv > (i-1) | blmrv < 0)
  persist_df$blmrv = 
    ifelse(persist_df$blmrv > -1, persist_df$blmrv - i, persist_df$blmrv)
  
  rdmod_count_persist = 
    with(persist_df, rdrobust(x = blmrv, y = hit, p = 1, kernel = "uni"))
  
  mat_persist[i, 1] = rdmod_count_persist$coef[1]
  mat_persist[i, 2] = rdmod_count_persist$se[3]
  mat_persist[i, 3] = rdmod_count_persist$pv[3]
  mat_persist[i, 4] = i
  
}

sdplot4 = mat_persist %>% 
  ggplot() + 
  geom_point(aes(x = dayscut, y = est),
             size = .3) + 
  geom_errorbar(aes(x = dayscut, 
                    ymin = est - 1.96 * se,
                    ymax = est + 1.96 * se),
                width = 0,
                size = .05) +
  geom_errorbar(aes(x = dayscut, 
                    ymin = est - 1.645 * se,
                    ymax = est + 1.645 * se),
                width = 0,
                size = .1) +
  geom_hline(yintercept = 0) + 
  labs(x = "Days Cut Post-BLM Protest",
       y = "RDiT Coefficient\n(Post-BLM Protest)",
       title = "D.Post-BLM Effect Persistence\n(Hit Rates)") + 
  theme_tufte(base_size = 7)

#### analyzing arrest rate data #### 

# rdit analysis

sd_arrest_ts$blmrv = 
  sd_arrest_ts$date - min(sd_arrest_ts$date[sd_arrest_ts$blm == 1])

rdmod_arrest = 
  with(sd_arrest_ts, rdrobust(x = blmrv, y = arrest, p = 1, kernel = "uni"))

# plot

sdplot5 = sd_arrest_ts %>% 
  filter(date >= as.Date("2020-05-28")-60) %>% 
  filter(date <= as.Date("2020-05-28")+60) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-05-28"),
             linetype = 2) + 
  geom_point(aes(x = date, y = arrest),
             alpha = .3,
             size = .5) + 
  geom_smooth(aes(x = date, y = arrest, group = blm),
              se = FALSE,
              col = "black",
              size = .4) + 
  labs(x = "Date", y = "Arrest Rate",
       title = "E. Arrest Rates Over Time") + 
  annotate("text",
           label = paste0("Est: ", round(rdmod_arrest$coef[1], 2),
                          "\nSE: ", round(rdmod_arrest$se[3], 2),
                          "\np < 0.001 "),
           x = as.Date("2020-04-15"),
           y = .3,
           family = "serif",
           size = 2.25) + 
  theme_tufte(base_size = 7) 

# assessing persistence

persist = 100

mat_persist = 
  matrix(NA, nrow = persist, ncol = 4) %>% 
  as.data.frame %>% 
  `colnames<-` (c("est", "se", "pv", "dayscut")) %>% 
  mutate(est = as.numeric(est),
         se = as.numeric(se),
         pv = as.numeric(pv),
         dayscut = as.numeric(dayscut))

for (i in 1:persist) {
  
  print(paste0("I iteration ", i))
  
  persist_df = sd_arrest_ts %>% filter(blmrv > (i-1) | blmrv < 0)
  persist_df$blmrv = 
    ifelse(persist_df$blmrv > -1, persist_df$blmrv - i, persist_df$blmrv)
  
  rdmod_count_persist = 
    with(persist_df, rdrobust(x = blmrv, y = arrest, p = 1, kernel = "uni"))
  
  mat_persist[i, 1] = rdmod_count_persist$coef[1]
  mat_persist[i, 2] = rdmod_count_persist$se[3]
  mat_persist[i, 3] = rdmod_count_persist$pv[3]
  mat_persist[i, 4] = i
  
}

sdplot6 = mat_persist %>% 
  ggplot() + 
  geom_point(aes(x = dayscut, y = est),
             size = .3) + 
  geom_errorbar(aes(x = dayscut, 
                    ymin = est - 1.96 * se,
                    ymax = est + 1.96 * se),
                width = 0,
                size = .05) +
  geom_errorbar(aes(x = dayscut, 
                    ymin = est - 1.645 * se,
                    ymax = est + 1.645 * se),
                width = 0,
                size = .1) +
  geom_hline(yintercept = 0) + 
  labs(x = "Days Cut Post-BLM Protest",
       y = "RDiT Coefficient\n(Post-BLM Protest)",
       title = "F. Post-BLM Effect Persistence\n(Arrest Rates)") + 
  theme_tufte(base_size = 7)

#### analyzing race data #### 

# rdit analysis

sd_race_ts$blmrv = 
  sd_race_ts$date - min(sd_race_ts$date[sd_race_ts$blm == 1])

rdmod_race1 = 
  with(sd_race_ts, rdrobust(x = blmrv, y = rr_bw, p = 1, kernel = "uni"))

rdmod_race2 = 
  with(sd_race_ts, rdrobust(x = blmrv, y = rr_lw, p = 1, kernel = "uni"))

# stops over time (b/w)

sdplot7 = sd_race_ts %>% 
  filter(date >= as.Date("2020-05-28")-60) %>% 
  filter(date <= as.Date("2020-05-28")+60) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-05-28"),
             linetype = 2) + 
  geom_point(aes(x = date, y = rr_bw),
             alpha = .3,
             size = .5) + 
  geom_smooth(aes(x = date, y = rr_bw, group = blm),
              se = FALSE,
              col = "black",
              size = .4) + 
  labs(x = "Date", y = "Terry Stops",
       title = "G. B/W Rate Ratio Over Time") + 
  annotate("text",
           label = paste0("Est: ", round(rdmod_race1$coef[1], 2),
                          "\nSE: ", round(rdmod_race1$se[3], 2),
                          "\np < 0.001 "),
           x = as.Date("2020-07-01"),
           y = 1,
           family = "serif",
           size = 2.25) + 
  theme_tufte(base_size = 7) 

# persistence (b/w)

persist = 100

mat_persist = 
  matrix(NA, nrow = persist, ncol = 4) %>% 
  as.data.frame %>% 
  `colnames<-` (c("est", "se", "pv", "dayscut")) %>% 
  mutate(est = as.numeric(est),
         se = as.numeric(se),
         pv = as.numeric(pv),
         dayscut = as.numeric(dayscut))

for (i in 1:persist) {
  
  print(paste0("I iteration ", i))
  
  persist_df = sd_race_ts %>% filter(blmrv > (i-1) | blmrv < 0)
  persist_df$blmrv = 
    ifelse(persist_df$blmrv > -1, persist_df$blmrv - i, persist_df$blmrv)
  
  rdmod_count_persist = 
    with(persist_df, rdrobust(x = blmrv, y = rr_bw, p = 1, kernel = "uni"))
  
  mat_persist[i, 1] = rdmod_count_persist$coef[1]
  mat_persist[i, 2] = rdmod_count_persist$se[3]
  mat_persist[i, 3] = rdmod_count_persist$pv[3]
  mat_persist[i, 4] = i
  
}

sdplot8 = mat_persist %>% 
  ggplot() + 
  geom_point(aes(x = dayscut, y = est),
             size = .3) + 
  geom_errorbar(aes(x = dayscut, 
                    ymin = est - 1.96 * se,
                    ymax = est + 1.96 * se),
                width = 0,
                size = .05) +
  geom_errorbar(aes(x = dayscut, 
                    ymin = est - 1.645 * se,
                    ymax = est + 1.645 * se),
                width = 0,
                size = .1) +
  geom_hline(yintercept = 0) + 
  labs(x = "Days Cut Post-BLM Protest",
       y = "RDiT Coefficient\n(Post-BLM Protest)",
       title = "H. Post-BLM Effect Persistence\n(B/W Rate Ratio)") + 
  theme_tufte(base_size = 7)

# stops over time (l/w)

sdplot9 = sd_race_ts %>% 
  filter(date >= as.Date("2020-05-28")-60) %>% 
  filter(date <= as.Date("2020-05-28")+60) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-05-28"),
             linetype = 2) + 
  geom_point(aes(x = date, y = rr_lw),
             alpha = .3,
             size = .5) + 
  geom_smooth(aes(x = date, y = rr_lw, group = blm),
              se = FALSE,
              col = "black",
              size = .4) + 
  labs(x = "Date", y = "Terry Stops",
       title = "I. L/W Rate Ratio Over Time") + 
  annotate("text",
           label = paste0("Est: ", round(rdmod_race2$coef[1], 2),
                          "\nSE: ", round(rdmod_race2$se[3], 2),
                          "\np = ", round(rdmod_race2$pv[3], 2)),
           x = as.Date("2020-07-01"),
           y = 6,
           family = "serif",
           size = 2.25) + 
  theme_tufte(base_size = 7) 

# persistence (l/w)

persist = 100

mat_persist = 
  matrix(NA, nrow = persist, ncol = 4) %>% 
  as.data.frame %>% 
  `colnames<-` (c("est", "se", "pv", "dayscut")) %>% 
  mutate(est = as.numeric(est),
         se = as.numeric(se),
         pv = as.numeric(pv),
         dayscut = as.numeric(dayscut))

for (i in 1:persist) {
  
  print(paste0("I iteration ", i))
  
  persist_df = sd_race_ts %>% filter(blmrv > (i-1) | blmrv < 0)
  persist_df$blmrv = 
    ifelse(persist_df$blmrv > -1, persist_df$blmrv - i, persist_df$blmrv)
  
  rdmod_count_persist = 
    with(persist_df, rdrobust(x = blmrv, y = rr_lw, p = 1, kernel = "uni"))
  
  mat_persist[i, 1] = rdmod_count_persist$coef[1]
  mat_persist[i, 2] = rdmod_count_persist$se[3]
  mat_persist[i, 3] = rdmod_count_persist$pv[3]
  mat_persist[i, 4] = i
  
}

sdplot10 = mat_persist %>% 
  ggplot() + 
  geom_point(aes(x = dayscut, y = est),
             size = .3) + 
  geom_errorbar(aes(x = dayscut, 
                    ymin = est - 1.96 * se,
                    ymax = est + 1.96 * se),
                width = 0,
                size = .05) +
  geom_errorbar(aes(x = dayscut, 
                    ymin = est - 1.645 * se,
                    ymax = est + 1.645 * se),
                width = 0,
                size = .1) +
  geom_hline(yintercept = 0) + 
  labs(x = "Days Cut Post-BLM Protest",
       y = "RDiT Coefficient\n(Post-BLM Protest)",
       title = "J. Post-BLM Effect Persistence\n(L/W Rate Ratio)") + 
  theme_tufte(base_size = 7)

#### analyzing crime proxy data #### 

## rdit analysis
#
#sd_call_ts$blmrv = 
#  sd_call_ts$date - min(sd_call_ts$date[sd_call_ts$blm == 1])
# 
#rdmod_violent = 
#  with(sd_call_ts, rdrobust(x = blmrv,
#                            y = violent_calls, p = 1, kernel = "uni"))
#
## plot
#
#sdplot7 = sd_call_ts %>% 
#  filter(date >= as.Date("2020-05-28")-60) %>% 
#  filter(date <= as.Date("2020-05-28")+60) %>% 
#  ggplot() + 
#  geom_vline(xintercept = as.Date("2020-05-28"),
#             linetype = 2) + 
#  geom_point(aes(x = date, y = violent_calls),
#             alpha = .3,
#             size = .5) + 
#  geom_smooth(aes(x = date, y = violent_calls, group = blm),
#              se = FALSE,
#              col = "black",
#              size = .6) + 
#  labs(x = "Date", y = "Reported Violent Crimes",
#       title = "G. Violent Crime Over Time") + 
#  annotate("text",
#           label = paste0("Est: ", round(rdmod_violent$coef[1], 0),
#                          "\nSE: ", round(rdmod_violent$se[3], 0),
#                          "\np < 0.001 "),
#           x = as.Date("2020-04-15"),
#           y = 62,
#           family = "serif",
#           size = 2.25) + 
#  theme_tufte(base_size = 7) 
#
#
#sd_call_ts %>% 
#  filter(date >= as.Date("2019-01-28")-60) %>% 
#  filter(date <= as.Date("2020-05-28")+60) %>% 
#  ggplot() + 
#  geom_vline(xintercept = as.Date("2020-05-28"),
#             linetype = 2) + 
#  geom_point(aes(x = date, y = violent_calls),
#             alpha = .3,
#             size = .5) + 
#  geom_smooth(aes(x = date, y = violent_calls, group = blm),
#              se = FALSE,
#              col = "black",
#              size = .6) + 
#  labs(x = "Date", y = "Reported Violent Crimes",
#       title = "G. Violent Crime Over Time") + 
#  annotate("text",
#           label = paste0("Est: ", round(rdmod_violent$coef[1], 0),
#                          "\nSE: ", round(rdmod_violent$se[3], 0),
#                          "\np < 0.001 "),
#           x = as.Date("2020-04-15"),
#           y = 62,
#           family = "serif",
#           size = 2.25) + 
#  theme_tufte(base_size = 7) 
#
## assessing persistence
#
#persist = 100
#
#mat_persist = 
#  matrix(NA, nrow = persist, ncol = 4) %>% 
#  as.data.frame %>% 
#  `colnames<-` (c("est", "se", "pv", "dayscut")) %>% 
#  mutate(est = as.numeric(est),
#         se = as.numeric(se),
#         pv = as.numeric(pv),
#         dayscut = as.numeric(dayscut))
#
#for (i in 1:persist) {
#  
#  print(paste0("I iteration ", i))
#  
#  persist_df = sd_call_ts %>% filter(blmrv > (i-1) | blmrv < 0)
#  persist_df$blmrv = 
#    ifelse(persist_df$blmrv > -1, persist_df$blmrv - i, persist_df$blmrv)
#  
#  rdmod_count_persist = 
#    with(persist_df, rdrobust(x = blmrv, y = violent_calls, p = 1, kernel = "uni"))
#  
#  mat_persist[i, 1] = rdmod_count_persist$coef[1]
#  mat_persist[i, 2] = rdmod_count_persist$se[3]
#  mat_persist[i, 3] = rdmod_count_persist$pv[3]
#  mat_persist[i, 4] = i
#  
#}
#
#sdplot8 = mat_persist %>% 
#  ggplot() + 
#  geom_point(aes(x = dayscut, y = est),
#             size = .3) + 
#  geom_errorbar(aes(x = dayscut, 
#                    ymin = est - 1.96 * se,
#                    ymax = est + 1.96 * se),
#                width = 0,
#                size = .05) +
#  geom_errorbar(aes(x = dayscut, 
#                    ymin = est - 1.645 * se,
#                    ymax = est + 1.645 * se),
#                width = 0,
#                size = .1) +
#  geom_hline(yintercept = 0) + 
#  labs(x = "Days Cut Post-BLM Protest",
#       y = "RDiT Coefficient\n(Post-BLM Protest)",
#       title = "H. Post-BLM Effect Persistence\n(Violent Crime)") + 
#  theme_tufte(base_size = 7)

sd_grob = arrangeGrob(sdplot1, sdplot2, sdplot3, sdplot4,
            sdplot5, sdplot6, sdplot7, sdplot8,
            grid::nullGrob(), sdplot9, sdplot10, grid::nullGrob(),
            layout_matrix = matrix(c(1, 2, 3, 4,
                                     5, 6, 7, 8,
                                     9, 10, 11, 12), ncol = 4, nrow = 3, byrow = TRUE))


ggsave(plot = sd_grob, filename = "pics/sdres.png", width = 8, height = 6)
